url_prefix <- "https://raw.githubusercontent.com/michael-franke/intro-data-analysis/master/data_sets/"
WHO_data_url <- str_c(url_prefix, "WHO.csv")
d <- read_csv(WHO_data_url)
glimpse(d)
## Observations: 194
## Variables: 13
## $ Country <chr> "Afghanistan", "Albania", "Algeria…
## $ Region <chr> "Eastern Mediterranean", "Europe",…
## $ Population <dbl> 29825, 3162, 38482, 78, 20821, 89,…
## $ Under15 <dbl> 47.42, 21.33, 27.42, 15.20, 47.58,…
## $ Over60 <dbl> 3.82, 14.93, 7.17, 22.86, 3.84, 12…
## $ FertilityRate <chr> "\r5.4\r", "\r1.75\r", "\r2.83\r",…
## $ LifeExpectancy <dbl> 60, 74, 73, 82, 51, 75, 76, 71, 82…
## $ ChildMortality <dbl> 98.5, 16.7, 20.0, 3.2, 163.5, 9.9,…
## $ CellularSubscribers <dbl> 54.26, 96.39, 98.99, 75.49, 48.38,…
## $ LiteracyRate <chr> NA, NA, NA, NA, "\r70.1\r", "\r99.…
## $ GNI <chr> "\r1140\r", "\r8820\r", "\r8310\r"…
## $ PrimarySchoolEnrollmentMale <chr> NA, NA, "\r98.2\r", "\r78.4\r", "\…
## $ PrimarySchoolEnrollmentFemale <chr> NA, NA, "\r96.4\r", "\r79.4\r", "\…
d %>% ggplot(aes(x = Region)) +
geom_bar() +
labs(
x = "Region",
y = "Number of countries in data set"
)
countries_per_region <- d %>% group_by(Region) %>%
summarise(countries_per_region = n()) %>%
ggplot(aes(fct_reorder(Region, countries_per_region, .desc=TRUE), y = countries_per_region)) +
geom_col() +
labs(
title = "Countries per region",
x = "Region",
y = "Number of countries in data set"
)
countries_per_region
population_per_region <- d %>% group_by(Region) %>%
summarise(total_population = sum(Population)) %>%
ggplot(mapping = aes(x = Region, y = total_population)) +
geom_col() +
labs(
title = "Population per region",
x = "Region",
y = "Population"
)
population_per_region
plot_grid(countries_per_region, population_per_region, nrow = 2)
d %>% group_by(Region) %>% summarise(
Min = min(ChildMortality),
"0.25_quant" = quantile(ChildMortality, 0.25),
"0.5_quant" = quantile(ChildMortality, 0.5),
mean = mean(ChildMortality),
"0.75_quant" = quantile(ChildMortality, 0.75),
Max = max(ChildMortality)
)
## # A tibble: 6 x 7
## Region Min `0.25_quant` `0.5_quant` mean `0.75_quant` Max
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Africa 13.1 58.6 81.8 84.0 102. 182.
## 2 Americas 5.3 13.0 17.5 19.3 22.4 75.6
## 3 Eastern Mediterr… 7.4 11.2 18.4 40.2 69.8 147.
## 4 Europe 2.2 3.8 4.8 10.1 10.7 58.3
## 5 South-East Asia 9.6 21 40.9 35.0 48.4 56.7
## 6 Western Pacific 2.9 9.55 22.4 24.7 34.1 71.8
WHO_data <- d %>% group_by(Region) %>%
mutate(mean_cm = mean(ChildMortality)) %>%
ungroup()
WHO_data %>% ggplot() + geom_violin(aes(fct_reorder(Region, mean_cm), ChildMortality)) +
labs(x = "Region")
ci_means_cm <- d %>% group_by(Region) %>%
mutate(mean_cm = mean(ChildMortality)) %>%
nest() %>%
summarise(CIs = map(data, function(d) bootstrapped_CI(d$ChildMortality))) %>%
unnest(CIs)
WHO_data %>% ggplot() +
geom_violin(aes(fct_reorder(Region, mean_cm), ChildMortality, fill = Region), show.legend=FALSE) +
geom_pointrange(mapping = aes(x = Region, y = mean, ymin = lower, ymax = upper), data = ci_means_cm) +
labs(x = "Region")